In [5]:
%load_ext autoreload
%matplotlib nbagg
%autoreload 2
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
Lien vers data.gouv.fr : https://www.data.gouv.fr/fr/datasets/base-de-donnees-accidents-corporels-de-la-circulation/#_
Documentation de la base de donnée : DATA/Description_des_bases_de_donneesONISR-Annees_2005_a_2016.pdf
In [6]:
dfc = pd.read_csv('./DATA/caracteristiques_2016.csv')
dfu = pd.read_csv('./DATA/usagers_2016.csv')
dfl = pd.read_csv('./DATA/lieux_2016.csv')
In [7]:
dfc.head()
Out[7]:
In [8]:
dfl.tail()
Out[8]:
In [9]:
dfu.tail()
Out[9]:
In [10]:
df = pd.concat([dfu, dfc, dfl], axis=1)
df.head()
Out[10]:
In [11]:
# methode pas propre
(h,c)=df[df.sexe==1].shape
(f,c)=df[df.sexe==2].shape
(t,c)=df.shape
print('h/t=', h/t)
print('f/t=', f/t)
In [12]:
# methode panda
df["sexe"].value_counts(normalize=True)
Out[12]:
In [13]:
fig = plt.figure()
df[df.grav==2].sexe.value_counts(normalize=True).plot.pie(labels=['Homme', 'Femme'], colors= ['r', 'g'], autopct='%.2f')
Out[13]:
In [14]:
dlum = df["lum"].value_counts(normalize=True)
dlum = dlum.sort_index()
In [15]:
dlum
Out[15]:
In [16]:
dlum[3] = dlum[3:5].sum()
fig = plt.figure()
dlum[1:3].plot.pie(labels=['Jour','Aube/crépuscule', 'Nuit'], colors= ['y', 'g' , 'b'], autopct='%.2f')
Out[16]:
In [17]:
# methode pas a pas
dfp = df[df.gps=='M']
dfp = dfp[['lat','long']]
dfp = dfp[(dfp.long!=0.0) & (dfp.lat!=0.0)]
dfp.head()
Out[17]:
In [18]:
plt.figure()
dfp.plot.scatter(x='long', y='lat',s=1);
In [ ]:
# methode direct
plt.figure()
df[(df.long!=0.0) & (df.lat!=0.0) & (df.gps=='M')].plot.scatter(x='long', y='lat',s=.5)
In [32]:
df=df.assign(age=2016.-dfu.an_nais)
nb_bin=df.age.max().astype(np.int)
In [34]:
plt.figure()
df['age'].hist(bins=nb_bin)
plt.show()
In [35]:
plt.figure()
df[(df.long!=0.0) & (df.lat!=0.0) & (df.gps=='M')].plot.scatter(x='long', y='lat',c='age',s=.5)
Out[35]:
In [ ]:
In [ ]: